{ "cells": [ { "cell_type": "markdown", "id": "d240914c", "metadata": {}, "source": [ "## Checking datatypes" ] }, { "cell_type": "markdown", "id": "9a44c5ca", "metadata": {}, "source": [ "### Data Types in Python,Pandas and Numpy" ] }, { "cell_type": "markdown", "id": "b080e59d", "metadata": {}, "source": [ "| | Python | Pandas | Numpy |\n", "| --- | --- | ---- | --- |\n", "| integer | int | int64 | int_,int8,int16,int32,int64 |\n", "| float | float | float64 | float_,float16,float32,float64 \n", "| string | str | object | string_ |\n", "| boolean | bool | bool | bool_ |" ] }, { "cell_type": "markdown", "id": "eefcce6d", "metadata": {}, "source": [ "### Note" ] }, { "cell_type": "markdown", "id": "42112bfa", "metadata": {}, "source": [ "- No date datatype is present in python\n", "- Extra Datatypes in Pandas:datetime64,timedelta[ns],category\n", "- Extra Datatypes in Python:list,tupple,set,dictinary" ] }, { "cell_type": "code", "execution_count": 50, "id": "d380fa00", "metadata": {}, "outputs": [], "source": [ "# Dataframe for testing purposes\n", "import pandas as pd\n", "import numpy as np\n", "\n", "df=pd.DataFrame({'Name':['Sahil', 'Sonia', 'Sourav', 'Vishal'],\n", " 'Age':[20, 21, 19, 18]})\n", "\n", "# Series for testing purposes\n", "series1=pd.Series([1,2,3])\n", "\n", "# Numpy Array for testing purposes\n", "arr = np.array([1,2,3])" ] }, { "cell_type": "code", "execution_count": 51, "id": "f1c2a75f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Name</th>\n", " <th>Age</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>Sahil</td>\n", " <td>20</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>Sonia</td>\n", " <td>21</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>Sourav</td>\n", " <td>19</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>Vishal</td>\n", " <td>18</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " Name Age\n", "0 Sahil 20\n", "1 Sonia 21\n", "2 Sourav 19\n", "3 Vishal 18" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 52, "id": "0430b712", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 1\n", "1 2\n", "2 3\n", "dtype: int64" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "series1" ] }, { "cell_type": "code", "execution_count": 53, "id": "72ce3fe1", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([1, 2, 3])" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "arr" ] }, { "cell_type": "markdown", "id": "8a13c25f", "metadata": {}, "source": [ "### Checking datatype" ] }, { "cell_type": "code", "execution_count": 61, "id": "ef313a19", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Name object\n", "Age int64\n", "dtype: object" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# For basic Python data types -> only type can be used\n", "type('a') # <class 'str'>\n", "type(1) # <class 'int'>\n", "type(1.0) # <class 'float'>\n", "type(1.9) # <class 'float'>\n", "type([1,2,3]) # <class 'list'>\n", "type([1,2,3,'sahil']) # <class 'list'>\n", "type(True) # <class 'bool'>\n", "type(('a','b')) # <class 'tuple'>\n", "type({'a':'b'}) # <class 'dict'>\n", "\n", "\n", "# For other things like Numpy,Pandas(Series,Dataframe)\n", " # We can use both type() and .dtype ( and .dtypes() also )\n", " # type(numpy arr | series | df) will tell the type of container\n", " # .dtype -> will tell us the datatype of elements inside it,which is useful\n", " # .dtypes -> will tell datatype of entire dataframe columns\n", "\n", "# For numpy array\n", "type(arr) # numpy.ndarray\n", "arr.dtype # int32\n", "\n", "# For Series\n", "type(df.Name) # pandas.core.series.Series\n", "type(df.Age) # pandas.core.series.Series\n", "df.Name.dtype # object\n", "df.Age.dtype # 'int64'\n", "\n", "# For Dataframe\n", "type(df) # pandas.core.frame.DataFrame\n", "df.dtypes" ] }, { "cell_type": "markdown", "id": "946ba417", "metadata": {}, "source": [ "### Summary" ] }, { "cell_type": "markdown", "id": "334be9f6", "metadata": {}, "source": [ "- For Python's inbuilt basic datatypes\n", " - type()\n", "- For numpy and Pandas columns/series/dataframes\n", " - .dtype and .dtypes() will tell type of elements (int,object,float)\n", " - type() will tell type of container (series,nparray,dataframe)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" } }, "nbformat": 4, "nbformat_minor": 5 }